Introduction
In this notebook we demonstrate how to use Milo to detect abherrant cell states in diseased tissues, using a dataset of hepatic non-parenchymal cells isolated from 5 healthy and 5 cirrhotic human livers. Ramachandran et al. 2019 (GEO accessiion: GSE136103).
Installation
Added by DRT 2021-12-03
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
bioc_pkgs <- setdiff(
c("SingleCellExperiment","scater","scran","Seurat"),
installed.packages()[,1]
)
BiocManager::install(bioc_pkgs)
'getOption("repos")' replaces Bioconductor standard repositories, see '?repositories' for details
replacement repositories:
CRAN: https://cran.rstudio.com/
Bioconductor version 3.12 (BiocManager 1.30.16), R 4.0.3 (2020-10-10)
Old packages: 'brio', 'cpp11', 'devtools', 'digest', 'dtplyr', 'fs', 'glue', 'irlba', 'pkgbuild', 'pkgload',
'readr', 'remotes', 'stringi', 'testthat', 'uwot', 'vroom', 'withr', 'xml2'
Update all/some/none? [a/s/n]:
n
getPackageIfNeeded <- function(pkg) {
if (!require(pkg, character.only=TRUE))
install.packages(pkgs=pkg, dependencies=TRUE)
}
# determine missing R libraries
pkgs <- setdiff(
c("tidyverse","miloR","patchwork","igraph",
"RColorBrewer","cowplot","devtools"),
installed.packages()[,1]
)
invisible(sapply(pkgs,getPackageIfNeeded))
if(!require(c("miloR"), character.only=TRUE)) devtools::install_github("MarioniLab/miloR")
Loading required package: miloR
Loading required package: edgeR
Loading required package: limma
# devtools::install_github("MarioniLab/miloR")
suppressPackageStartupMessages({
library(tidyverse)
# library(irlba)
# library(DropletUtils)
library(scater)
library(scran)
# library(Seurat) ## just 4 loading the object
library(miloR)
library(SingleCellExperiment)
library(patchwork)
library(igraph)
library(RColorBrewer)
library(cowplot)
})
Warning: package ‘scater’ was built under R version 4.0.4
Warning: package ‘BiocGenerics’ was built under R version 4.0.5
Warning: package ‘GenomeInfoDb’ was built under R version 4.0.5
Warning: package ‘scran’ was built under R version 4.0.5
Load data
We downloaded the dataset and annotations stored in Seurat object from here, as indicated by the authors.
# load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")
load(url("https://www.dropbox.com/s/bq816h74gmh84gu/tissue.rdata?dl=1"))
Loading required package: Seurat
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'spatstat.geom':
method from
print.boxx cli
Attaching SeuratObject
Attaching package: ‘Seurat’
The following object is masked from ‘package:SummarizedExperiment’:
Assays
## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data),
colData = tissue@meta.data)
liver_sce
class: SingleCellExperiment
dim: 23498 58358
metadata(0):
assays(2): counts logcounts
rownames(23498): FO538757.2 AP006222.2 ... CTA-126B4.7 LINC01423
rowData names(0):
colnames(58358): Healthy1_Cd45+_AAACCTGCAGTATCTG Healthy1_Cd45+_AACTGGTTCATGGTCA ...
Cirrhotic3_Cd45-_TTTGTCATCCAGGGCT Cirrhotic3_Cd45-_TCTGGAAGTCATCCCT
colData names(10): nGene nUMI ... annotation_indepth annotation_lineage
reducedDimNames(0):
altExpNames(0):
Preprocessing
We use the same number of highly variable genes and principal components used by the authors of the original study.
Feature selection
Select highly variable genes
dec_liver <- modelGeneVar(liver_sce)
fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
ylab="Variance of log-expression")

hvgs <- getTopHVGs(dec_liver, n=3000)
Dimensionality reduction
set.seed(42)
liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)
liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2)
scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1, point_size=0.5)

scater::plotUMAP(liver_sce, colour_by="dataset", point_alpha=0.3, point_size=0.5)

scater::plotUMAP(liver_sce, colour_by="annotation_lineage", point_alpha=0.3, point_size=0.5, text_by='annotation_lineage')
Warning: Removed 1 rows containing missing values (geom_text).

Notably, this dataset doesn’t appear to display a batch effect
Note: This step is not required. It generates a large file (even larger than raw data) and then reloads it.
saveRDS(liver_sce, "~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_SCE_20210225.RDS")
liver_sce <- readRDS(url("https://www.dropbox.com/s/z0aopf616b1urvj/liver_SCE_20210225.RDS?dl=1"))
Differential Abundance analysis with Milo
We test for differential abundance between healthy and cirrhotic livers. We start by defining neighbourhoods with refined sampling on the KNN graph. We inspect the size of neighbourhoods.
liver_milo <- Milo(liver_sce)
## Build KNN graph
liver_milo <- buildGraph(liver_milo, d = 11, k=30)
Constructing kNN graph with k:30
## Compute neighbourhoods with refined sampling
liver_milo <- makeNhoods(liver_milo, k=30, d=11, prop = 0.05, refined=TRUE)
Checking valid object
plotNhoodSizeHist(liver_milo, bins=150)

Then we make a design matrix for the differential test, assigning samples to biological conditions.
colData(liver_milo)[['sort']] <- str_remove(colData(liver_milo)[['dataset']], ".+_")
colData(liver_milo)[['sort']] <- str_remove(colData(liver_milo)[['sort']], "A|B")
liver_meta <- as_tibble(colData(liver_milo)[,c("dataset","condition", 'sort')])
liver_meta <- distinct(liver_meta) %>%
mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
column_to_rownames("dataset")
Now we can count cells in neighbourhoods and perform the DA test.
liver_milo <- countCells(liver_milo, samples = "dataset", meta.data = data.frame(colData(liver_milo)[,c("dataset","condition",'sort')]) )
Checking meta.data validity
Counting cells in neighbourhoods
liver_milo <- calcNhoodDistance(liver_milo, d=11)
milo_res <- testNhoods(liver_milo, design = ~ condition, design.df = liver_meta[colnames(nhoodCounts(liver_milo)),])
Using TMM normalisation
Performing spatial FDR correction withk-distance weighting
milo_res_sort <- testNhoods(liver_milo, design = ~ sort + condition, design.df =
liver_meta[colnames(nhoodCounts(liver_milo)),])
Using TMM normalisation
Performing spatial FDR correction withk-distance weighting
compare_da_df <- left_join(milo_res_sort, milo_res, by="Nhood", suffix=c("_sort", "_nosort")) %>%
{annotateNhoods(liver_milo, ., 'annotation_lineage')}
compare_da_df %>%
ggplot(aes(-log10(SpatialFDR_sort), -log10(SpatialFDR_nosort))) +
geom_point(size=0.8) +
geom_point(data=. %>% filter(annotation_lineage=="Endothelia"), color="red")

plot(milo_res_sort$SpatialFDR, milo_res$SpatialFDR)

Exploration of Milo DA results
We can start by looking at some basic stats
pval_hist <- milo_res %>%
ggplot(aes(PValue)) +
geom_histogram(bins=50) +
theme_bw(base_size=14)
volcano <-
milo_res %>%
ggplot(aes(logFC, -log10(SpatialFDR))) +
geom_point(size=0.4, alpha=0.2) +
geom_hline(yintercept = -log10(0.1)) +
xlab("log-Fold Change") +
theme_bw(base_size=14)
pval_hist + volcano

The distribution of P-values looks sensible and from the volcano plot we can see that we have identified some DA neighbourhoods at 10% FDR.
We can visualize DA neighbourhoods building an abstracted graph
liver_milo <- buildNhoodGraph(liver_milo)
plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.1, size_range=c(2,6))

Note: This step is not required. It generates a large file (even larger than raw data) and then reloads it.
## Save milo object and results
saveRDS(liver_milo,"~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_milo_20210225.RDS")
write_csv(milo_res,"~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_results_20210225.csv")
# liver_milo <- readRDS("~/liver_milo_20201008.RDS")
liver_milo <- readRDS(url("https://www.dropbox.com/s/xdp07789c5hoen3/liver_milo_20210225.RDS?dl=1"))
# milo_res <- read_csv("/nfs/team205/ed6/data/Ramachandran2019_liver/liver_results_20201008.csv")
milo_res <- read_csv(url("https://www.dropbox.com/s/i1l3aep1py5wirf/liver_results_20210225.csv?dl=1"))
NOTE: hvgs (presumably highly variable genes) cannot be loaded since file not provided
## Load hvgs
hvgs <- scan("~/data/Ramachandran2019_liver/liver_milo_hvgs.txt", "")
Warning in file(file, "r") :
cannot open file '/Users/darren/data/Ramachandran2019_liver/liver_milo_hvgs.txt': No such file or directory
Error in file(file, "r") : cannot open the connection
Attempting to generate a set of highly variable genes
Making figures for the manuscript
Explore DA neighbourhoods by cell type
Next, we can check the cell types where we observe most differences between healthy and cirrhotic cells, by taking the most frequent cell type in each neighbourhood.
We first check that neighbourhoods are sufficiently homogeneous
Filter nhoods with homogeneous composition
I can recover all the clusters where DA was detected in the original paper
Close-up on Endothelial lineage
Close-up on Cholangiocytes
Filter out cells that show contamination from immune cells (expression of immune markers)
Differential Gene Expression analysis
In a subset of lineages, we want to test for differential expression between neighbourhoods enriched in cirrhotic cells and neighbourhoods enriched
NOTE: Code below here won’t work unless hvgs is defined
DRT: stopping run-through here
Should be able to run everything above.
Add nhood expression to speed-up plotting of heatmaps
liver_milo <- calcNhoodExpression(liver_milo, assay = "logcounts", subset.row = hvgs)
Endothelia
Rebuttal figure showcasing grouping
set.seed(42)
milo_res_endogroups <- groupNhoods(liver_milo, milo_res, max.lfc.delta = 2, overlap = 1)
p1 <- plotNhoodGroups(liver_milo, milo_res_endogroups,
size_range=c(1,3))
milo_res_endogroups <- annotateNhoods(liver_milo, milo_res_endogroups, 'annotation_lineage')
p2 <- plotDAbeeswarm(milo_res_endogroups, group.by = 'NhoodGroup') +
facet_grid(annotation_lineage~., scales="free", space="free")
## Plot expression in T cell neighbourhoods
markers_df <- read_csv("~/mount/gdrive/milo/STable3_Ramachandran.csv")
tcell_marker_genes <-
markers_df %>%
filter(cluster %in% c("Tcell", "ILC")) %>%
top_n(30, myAUC) %>%
pull(gene)
p3 <- plotNhoodExpressionGroups(liver_milo, milo_res_endogroups, features = unique(tcell_marker_genes),
subset.nhoods = milo_res_endogroups$NhoodGroup %in% c("3","10", "14"),
scale=TRUE, cluster_features = TRUE,show_rownames = TRUE
) +
theme(strip.text.x = element_text(angle=90))
(((p1 + theme())/ (p3 + theme(strip.text = element_text(size=10, angle=45)))) +
plot_layout(heights = c(1.1,1), guides="collect"
)|
(
p2 + theme_bw(base_size=16) + theme(strip.text.y = element_text(angle=0))
)) +
plot_layout(widths = c(1.4, 1)) +
plot_annotation(tag_levels = c("A", "C", "B") ) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/RFig_grouping.pdf", width=15, height = 12) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/RFig_grouping.png", width=15, height = 12)
Group endothelial cells by logFC and DA results
milo_res_endogroups$annotation_indepth[milo_res_endogroups$annotation_indepth_fraction < 0.6] <- NA
milo_res_endogroups$annotation_lineage[milo_res_endogroups$annotation_indepth_fraction < 0.6] <- NA
## Group neighbourhoods by DA outcome
milo_res_endogroups$NhoodGroup <- NA
milo_res_endogroups$NhoodGroup <- ifelse((milo_res_endogroups$annotation_lineage == "Endothelia") & (milo_res_endogroups$SpatialFDR < 0.1) & (milo_res_endogroups$logFC < -2.5), "54", milo_res_endogroups$NhoodGroup)
milo_res_endogroups$NhoodGroup <- ifelse((milo_res_endogroups$annotation_lineage == "Endothelia") & (milo_res_endogroups$SpatialFDR < 0.1) & (milo_res_endogroups$logFC > 2.5), "70", milo_res_endogroups$NhoodGroup)
liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP")
endo_gr_groups <- plotNhoodGroups(liver_milo2, milo_res_endogroups[milo_res_endogroups$annotation_lineage=="Endothelia",],
show_groups = c("54", "70"),
size_range=c(1,4),
subset.nhoods = milo_res_endogroups$annotation_lineage=="Endothelia") +
scale_fill_manual(values=c("54"=brewer.pal(4, "Spectral")[2], "70"=brewer.pal(4, "Spectral")[3]),
labels=c("54"="Uninjured group", '70'= "Cirrhotic group"),
na.value="white",
name = "Nhood group"
) +
theme(legend.text = element_text(size=20), legend.title = element_text(size=22))
fig4_bright1 <- ((endo_umap + endo_gr) +
plot_layout(widths = c(1,2), guides="collect"
)) &
theme(legend.box = "horizontal", legend.position = "top", legend.direction = "vertical")
fig4_bright1
Calculate marker genes between the two groups
mito_genes <- str_detect(hvgs, "^MT-")
markers_df <- findNhoodGroupMarkers(liver_milo, da.res = milo_res_endogroups, assay="counts",
subset.nhoods = (milo_res_endogroups$NhoodGroup %in% c("54", "70")),
subset.groups = c("54", "70"),
subset.row = hvgs[!mito_genes],
aggregate.samples = TRUE, sample_col = "dataset"
)
milo_res_endogroups[milo_res_endogroups$NhoodGroup %in% c("54", "70"),]
colnames(markers_df) <- str_replace(colnames(markers_df), "70", "cirr")
colnames(markers_df) <- str_replace(colnames(markers_df), "54", "uninj")
Visualize as volcano
highlight_genes <- c("PLVAP", "VWA1", "ACKR1", "IL32",
"CLEC4G", "CLEC4M", "FCN2", "FCN3",
"LEF1")
marker.df <- markers_df
marker.df %>%
mutate(label=ifelse(GeneID %in% highlight_genes, GeneID, NA)) %>%
ggplot(aes(logFC_cirr, -log10(adj.P.Val_cirr),
# color=highlight
)) +
geom_point() +
geom_text(aes(label=label), color="red") +
xlab("logFC") + ylab("- log10(Adj. P value)") +
theme_bw(base_size = 22)
Visualize as heatmap
(gene expression values are scaled between 0 and 1 for each gene)
marker_genes <- marker.df %>%
dplyr::filter(adj.P.Val_cirr < 0.05) %>%
pull(GeneID)
fig4_bbright <-
plotNhoodExpressionDA(liver_milo, milo_res_endogroups, c(marker_genes), cluster_features = TRUE, assay = "counts",
alpha = 0.1,
scale_to_1 = TRUE,
subset.nhoods = milo_res_endogroups$NhoodGroup %in% c("54", "70"),
# grid.space = "free",
highlight_features = highlight_genes, show_rownames = FALSE
) +
ylab("DE genes")+
# facet_grid(.~NhoodGroup, scales="free", space="free")
theme(legend.text = element_text(size=22), legend.title = element_text(size=24)) +
plot_layout(heights = c(1,10)) & theme(legend.margin = margin(0,0,0,60), legend.background = element_blank())
pl3 <- fig4_bbright$data %>%
ggplot(aes(logFC_rank, 1,fill=logFC)) +
geom_tile() +
theme_classic(base_size=16) +
ylab("") +
scale_fill_gradient2(name="DA logFC") +
# scale_fill_manual(values=c("54"=brewer.pal(4, "Spectral")[2], "70"=brewer.pal(4, "Spectral")[3]),
# labels=c("54"="Uninjured group", '70'= "Cirrhotic group"),
# na.value="white",
# name = "Nhood group"
# ) +
scale_x_continuous(expand = c(0.01, 0)) +
theme(axis.text = element_blank(), axis.ticks = element_blank(), axis.line = element_blank(),
axis.title = element_blank())
fig4_bbright <- pl3 / fig4_bbright +
plot_layout(heights = c(1,20))
fig4_bbright
GO term analysis
go_endo_up <- em_res_up %>%
top_n(30, -log10(qvalue)) %>%
mutate(ID=ifelse(ID=='GO_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II', "GO_ANTIGEN_PRESENTATION_VIA_MHC_CLASS_II", ID)) %>%
mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
ggplot(aes(Term, -log10(qvalue))) +
geom_point() +
coord_flip() +
xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
theme_bw(base_size=18) +
ggtitle("Cirrhotic endothelia")
go_endo_down <- em_res_down %>%
top_n(30, -log10(qvalue)) %>%
mutate(ID=ifelse(ID=='GO_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II', "GO_ANTIGEN_PRESENTATION_VIA_MHC_CLASS_II", ID)) %>%
mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
ggplot(aes(Term, -log10(qvalue))) +
geom_point() +
coord_flip() +
xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
theme_bw(base_size=18) +
ggtitle("Uninjured endothelia")
go_endo_up
go_endo_down
em_res_up
em_res_down
Cholangiocytes
set.seed(42)
milo_res_cholgroups <- groupNhoods(liver_milo, milo_res, max.lfc.delta = 0.5, overlap = 1)
## Group neighbourhoods by DA outcome
milo_res_cholgroups$NhoodGroup <- NA
milo_res_cholgroups$NhoodGroup <- ifelse((milo_res_cholgroups$annotation_lineage == "Cholangiocytes") & (milo_res_cholgroups$SpatialFDR < 0.1) & (milo_res_cholgroups$logFC < -2.5), "38", milo_res_cholgroups$NhoodGroup)
milo_res_cholgroups$NhoodGroup <- ifelse((milo_res_cholgroups$annotation_lineage == "Cholangiocytes") & (milo_res_cholgroups$SpatialFDR < 0.1) & (milo_res_cholgroups$logFC > 2.5), "49", milo_res_cholgroups$NhoodGroup)
liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Chol")
reducedDim(liver_milo2, "UMAP")[colnames(chol_milo),] <- reducedDim(chol_milo, "UMAP")
plotNhoodGroups(liver_milo2, milo_res_cholgroups[milo_res_cholgroups$annotation_lineage=="Cholangiocytes",],
show_groups = c("49","38"),
subset.nhoods = milo_res_cholgroups$annotation_lineage =="Cholangiocytes")
Calculate marker genes between the two groups
## Filter genes expressed in cholangiocytes
# chol_hvgs <- hvgs[(counts(chol_milo)[hvgs,] > 0) %>% {rowSums(.)/ncol(chol_milo)} > 0.01]
mito_genes <- str_detect(hvgs, "^MT-")
markers_df <- findNhoodGroupMarkers(liver_milo, da.res = milo_res_cholgroups, assay="counts",
subset.nhoods = milo_res_cholgroups$NhoodGroup %in%c("49","38"),
subset.groups = c("49","38"),
subset.row = hvgs[!mito_genes],
aggregate.samples = TRUE, sample_col = "dataset"
)
markers_df
milo_res_cholgroups[milo_res_cholgroups$NhoodGroup %in%c("49","38"),]
Visualize as volcano
marker.df.chol <- markers_df
volcano_chol <-
marker.df.chol %>%
mutate(up=ifelse(logFC_49 > 0, "up", "down")) %>%
group_by(up) %>%
mutate(label=ifelse(rank(adj.P.Val_49) < 15, GeneID, NA)) %>%
# mutate(label=ifelse((adj.P.Val_49 < 0.05 & logFC_49 < -3) | (adj.P.Val_49 < 0.05 & logFC_49 > 0), GeneID, NA)) %>%
ggplot(aes(logFC_49, -log10(adj.P.Val_49),
# color=highlight
)) +
geom_point(size=0.8, alpha=0.6) +
ggrepel::geom_text_repel(aes(label=label), segment.alpha = 0.2) +
xlab("logFC") + ylab("- log10(Adj. P value)") +
theme_bw(base_size = 22)
volcano_chol
GO term analysis
go_chol_up <- em_res_up_chol %>%
top_n(20, -log10(qvalue)) %>%
mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
ggplot(aes(Term, -log10(qvalue))) +
geom_point() +
coord_flip() +
xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
theme_bw(base_size=18) +
ggtitle("Cirrhotic cholangiocytes")
go_chol_up
em_res_up_chol
Assemble figure
fig4_bottom <- ((fig4_bleft + plot_layout()) |
((fig4_bright1 + plot_layout(tag_level = 'keep')) / (fig4_bbright + plot_layout())) +
plot_layout(heights = c(1,1.6))
) +
plot_layout(widths=c(1,1.4))
(fig4_top / fig4_bottom) +
plot_layout(heights=c(1,1.8)) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/fig4_raw.pdf", height = 26, width = 24, useDingbats=FALSE)
# ggsave("~/mount/gdrive/milo/Figures/liver_v2/fig4_raw.png", height = 26, width = 24, useDingbats=FALSE)
# ggsave("~/milo/ms/figures/figs/figure5.pdf", height = 26, width = 22, useDingbats=FALSE)
Assemble supplementary figure
p1 <- plot_grid( go_endo_up+ theme(plot.title = element_text(hjust = 1),
axis.title.x = element_text(hjust = 1)),
go_endo_down+ theme(plot.title = element_text(hjust = 1),
axis.title.x = element_text(hjust = 1)),
label_size = 18,
ncol=1,
rel_heights = c(2,2),
labels = c("A", "B","C"))
p1
chol_emb <- (chol_umap + chol_gr ) +
plot_layout(widths = c(1,2),
guides = "collect"
)
plot_grid(
go_endo_up+ theme(plot.title = element_text(hjust = 1),
axis.title.x = element_text(hjust = 1)),
go_endo_down+ theme(plot.title = element_text(hjust = 1),
axis.title.x = element_text(hjust = 1)),
label_size = 18,
ncol=1,
rel_heights = c(2,2), rel_widths = c(2,2),
labels = c("A", "B")
) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig_endo.pdf", height = 12, width=12) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig_endo.png", height = 12, width=12)
plot_grid(plot_grid(chol_umap, chol_gr, volcano_chol, nrow=1,rel_widths = c(1,2,2),
label_size = 18,
labels = c("A","B","C")),
go_chol_up + theme(plot.title = element_text(hjust = 1),
axis.title.x = element_text(hjust = 1)),
ncol=1,
rel_heights = c(1,1),
label_size = 18,
labels=c("",'D')) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig7.pdf", height = 13, width=14) +
ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig7.png", height = 13, width=14)
---
title: "Milo: liver cirrhosis analysis"
output: 
  html_notebook:
    code_folding: hide
---

## Introduction

In this notebook we demonstrate how to use Milo to detect abherrant cell states in diseased tissues, using a dataset of hepatic non-parenchymal cells isolated from 5 healthy and 5 cirrhotic human livers. [Ramachandran et al. 2019](https://www.nature.com/articles/s41586-019-1631-3#Sec1) (GEO accessiion: GSE136103).

## Installation
#### Added by DRT 2021-12-03

```{r Installation}
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

bioc_pkgs <- setdiff(
            c("SingleCellExperiment","scater","scran","Seurat"),
            installed.packages()[,1]
        )

BiocManager::install(bioc_pkgs)

getPackageIfNeeded <- function(pkg) {
  if (!require(pkg, character.only=TRUE))
    install.packages(pkgs=pkg, dependencies=TRUE)
}

# determine missing R libraries
pkgs <- setdiff(
            c("tidyverse","miloR","patchwork","igraph",
              "RColorBrewer","cowplot","devtools"),
            installed.packages()[,1]
        )

invisible(sapply(pkgs,getPackageIfNeeded))

if(!require(c("miloR"), character.only=TRUE)) devtools::install_github("MarioniLab/miloR")
```


```{r}
# devtools::install_github("MarioniLab/miloR")
suppressPackageStartupMessages({
  library(tidyverse)
  # library(irlba)
  # library(DropletUtils)
  library(scater)
  library(scran)
  # library(Seurat) ## just 4 loading the object
  library(miloR)
  library(SingleCellExperiment)
  library(patchwork)
  library(igraph)
  library(RColorBrewer)
  library(cowplot)
  })
```

## Load data

We downloaded the dataset and annotations stored in Seurat object from [here](https://datashare.is.ed.ac.uk/handle/10283/3433), as indicated by the authors.

```{r}
# load("/nfs/team205/ed6/data/Ramachandran2019_liver/tissue.rdata")
load(url("https://www.dropbox.com/s/bq816h74gmh84gu/tissue.rdata?dl=1"))
## Convert to SingleCellExperiment
liver_sce <- SingleCellExperiment(assay = list(counts=tissue@raw.data, logcounts=tissue@data),
                                  colData = tissue@meta.data)

liver_sce
```

## Preprocessing

We use the same number of highly variable genes and principal components used by the authors of the original study. 

### Feature selection

Select highly variable genes

```{r}
dec_liver <- modelGeneVar(liver_sce)

fit_liver <- metadata(dec_liver)
plot(fit_liver$mean, fit_liver$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")

hvgs <- getTopHVGs(dec_liver, n=3000)
```

### Dimensionality reduction

```{r, fig.height=8, fig.width=8}
set.seed(42)
liver_sce <- runPCA(liver_sce, subset_row=hvgs, ncomponents=11)
liver_sce <- runUMAP(liver_sce, dimred="PCA", ncomponents=2)

scater::plotUMAP(liver_sce, colour_by="condition", point_alpha=1,  point_size=0.5)
scater::plotUMAP(liver_sce, colour_by="dataset", point_alpha=0.3,  point_size=0.5)
scater::plotUMAP(liver_sce, colour_by="annotation_lineage", point_alpha=0.3,  point_size=0.5, text_by='annotation_lineage')
```

Notably, this dataset doesn't appear to display a batch effect

### Note: This step is not required. It generates a large file (even larger than raw data) and then reloads it. 
```{r save-load intermed file 1, eval=FALSE, include=TRUE, echo=TRUE}
saveRDS(liver_sce, "~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_SCE_20210225.RDS")
liver_sce <- readRDS(url("https://www.dropbox.com/s/z0aopf616b1urvj/liver_SCE_20210225.RDS?dl=1"))
```

## Differential Abundance analysis with Milo

We test for differential abundance between healthy and cirrhotic livers. We start by defining neighbourhoods with refined sampling on the KNN graph. We inspect the size of neighbourhoods.

```{r}
liver_milo <- Milo(liver_sce)

## Build KNN graph
liver_milo <- buildGraph(liver_milo, d = 11, k=30)

## Compute neighbourhoods with refined sampling
liver_milo <- makeNhoods(liver_milo, k=30, d=11, prop = 0.05, refined=TRUE)
plotNhoodSizeHist(liver_milo, bins=150)
```

Then we make a design matrix for the differential test, assigning samples to biological conditions.

```{r}
colData(liver_milo)[['sort']] <- str_remove(colData(liver_milo)[['dataset']], ".+_")
colData(liver_milo)[['sort']] <- str_remove(colData(liver_milo)[['sort']], "A|B")

liver_meta <- as_tibble(colData(liver_milo)[,c("dataset","condition", 'sort')])
liver_meta <- distinct(liver_meta) %>%
  mutate(condition=factor(condition, levels=c("Uninjured", "Cirrhotic"))) %>%
  column_to_rownames("dataset")

```

Now we can count cells in neighbourhoods and perform the DA test.

```{r}
liver_milo <- countCells(liver_milo, samples = "dataset", meta.data = data.frame(colData(liver_milo)[,c("dataset","condition",'sort')]) )
liver_milo <- calcNhoodDistance(liver_milo, d=11)
milo_res <- testNhoods(liver_milo, design = ~ condition, design.df = liver_meta[colnames(nhoodCounts(liver_milo)),])
milo_res_sort <- testNhoods(liver_milo, design = ~ sort + condition, design.df = 
                                liver_meta[colnames(nhoodCounts(liver_milo)),])
```

```{r}
compare_da_df <- left_join(milo_res_sort, milo_res, by="Nhood", suffix=c("_sort", "_nosort")) %>%
  {annotateNhoods(liver_milo, ., 'annotation_lineage')} 

compare_da_df %>%
  ggplot(aes(-log10(SpatialFDR_sort), -log10(SpatialFDR_nosort))) +
  geom_point(size=0.8) +
  geom_point(data=. %>% filter(annotation_lineage=="Endothelia"), color="red")
plot(milo_res_sort$SpatialFDR, milo_res$SpatialFDR)
```


## Exploration of Milo DA results

We can start by looking at some basic stats

```{r}
pval_hist <- milo_res %>%
  ggplot(aes(PValue)) +
  geom_histogram(bins=50) +
  theme_bw(base_size=14)

volcano <-
  milo_res %>%
  ggplot(aes(logFC, -log10(SpatialFDR))) +
  geom_point(size=0.4, alpha=0.2) +
  geom_hline(yintercept = -log10(0.1)) +
  xlab("log-Fold Change") +
  theme_bw(base_size=14)

pval_hist + volcano
```

The distribution of P-values looks sensible and from the volcano plot we can see that we have identified some DA neighbourhoods at 10% FDR.

We can visualize DA neighbourhoods building an abstracted graph

```{r, fig.width=14, fig.height=10}
liver_milo <- buildNhoodGraph(liver_milo)
plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.1, size_range=c(2,6))
```

### Note: This step is not required. It generates a large file (even larger than raw data) and then reloads it. 
```{r save intermed files 2, eval=FALSE, include=TRUE, echo=TRUE}
## Save milo object and results
saveRDS(liver_milo,"~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_milo_20210225.RDS")
write_csv(milo_res,"~/dropbox-vu/temp/milo_data/Ramachandran2019_liver/liver_results_20210225.csv")

```
```{r load intermed files 2, eval=FALSE, include=TRUE, echo=TRUE}
# liver_milo <- readRDS("~/liver_milo_20201008.RDS")
liver_milo <- readRDS(url("https://www.dropbox.com/s/xdp07789c5hoen3/liver_milo_20210225.RDS?dl=1"))
# milo_res <- read_csv("/nfs/team205/ed6/data/Ramachandran2019_liver/liver_results_20201008.csv")
milo_res <- read_csv(url("https://www.dropbox.com/s/i1l3aep1py5wirf/liver_results_20210225.csv?dl=1"))
```

### NOTE: `hvgs` (presumably highly variable genes) cannot be loaded since file not provided
```{r}
## Load hvgs 
hvgs <- scan("~/data/Ramachandran2019_liver/liver_milo_hvgs.txt", "")
```

### Attempting to generate a set of highly variable genes
```{r}
#
```

Making figures for the manuscript

```{r, fig.width=15, fig.height=10}
library(ggrastr)
colourCount = length(unique(liver_milo$annotation_lineage))
getPalette = colorRampPalette(brewer.pal(9, "Set2"))

umap_df <- data.frame(reducedDim(liver_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

umap1 <- cbind(umap_df, annotation_lineage=liver_milo$annotation_lineage) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=as.character(annotation_lineage))) +
  geom_point_rast(size=0.1, alpha=0.5, raster.dpi = 800) +
  ggrepel::geom_text_repel(data = . %>%
              group_by(annotation_lineage) %>%
              summarise(UMAP_1=mean(UMAP_1), UMAP_2=mean(UMAP_2)),
            aes(label=annotation_lineage), color="black", size=6
            ) +
  scale_color_manual(values=getPalette(colourCount)) +
  guides(color="none") +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  theme_classic(base_size = 22) +
  theme(axis.text = element_blank(), axis.ticks = element_blank())

umap2 <-
  cbind(umap_df, condition=as.character(liver_milo$condition)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point_rast(size=0.1, alpha=0.5, raster.dpi = 800) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color='none') +
  facet_wrap(condition~., ncol=1) +
  theme_nothing(font_size = 22) +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=22))

nh_graph_pl <- plotNhoodGraphDA(liver_milo, milo_res, alpha = 0.1, size_range=c(1,4)) +
  theme(legend.text = element_text(size=20), legend.title = element_text(size=22)) +
  coord_fixed()

# nh_graph_pl + ggsave("~/mount/gdrive/milo/Figures/liver_v2/liver_graph.pdf", height = 7, width = 8)
ggsave("~/dropbox-vu/temp/milo_output/liver_v2/liver_graph.pdf", height = 7, width = 8)

fig4_top <- (umap1 | umap2 | nh_graph_pl) +
  plot_layout(widths = c(3,1,3))

fig4_top
```

### Explore DA neighbourhoods by cell type

Next, we can check the cell types where we observe most differences between healthy and cirrhotic cells, by taking the most frequent cell type in each neighbourhood.

```{r, fig.width=9, fig.height=10}
milo_res <- milo_res[,!str_detect(colnames(milo_res), "annotation_lineage")]

# Add annotation of most frequent cell type per nhood to milo results table
milo_res <- annotateNhoods(liver_milo, milo_res, "annotation_indepth")
anno_df <- data.frame(liver_milo@colData) %>%
  distinct(annotation_lineage, annotation_indepth)
milo_res <- left_join(milo_res, anno_df, by="annotation_indepth")
```

We first check that neighbourhoods are sufficiently homogeneous

```{r}
frac_hist <- ggplot(milo_res, aes(annotation_indepth_fraction)) +
  geom_histogram(bins=30) +
  xlab("Fraction of cells in \nmost abundant cluster") +
  ylab("# neighbourhoods") +
  theme_bw(base_size=14)

frac_hist
```

Filter nhoods with homogeneous composition

```{r}
milo_res$annotation_indepth[milo_res$annotation_indepth_fraction < 0.6] <- NA
milo_res$annotation_lineage[milo_res$annotation_indepth_fraction < 0.6] <- NA
```


I can recover all the clusters where DA was detected in the original paper

```{r, fig.width=10, fig.height=10, warning=FALSE, message=FALSE}
group.by = "annotation_indepth"
paper_DA <- list(cirrhotic=c("MPs (4)","MPs (5)",
                             "Endothelia (6)", "Endothelia (7)",
                             "Mes (3)",
                             "Tcells (2)",
                             "Myofibroblasts"
                             ),
                 healthy=c("MPs (7)",
                           "Endothelia (1)",
                           "Tcells (1)", "Tcells (3)","Tcells (1)",
                           "ILCs (1)"
                           )
                 )

expDA_df <- bind_rows(
  data.frame(annotation_indepth = paper_DA[["cirrhotic"]], pred_DA="cirrhotic"),
  data.frame(annotation_indepth = paper_DA[["healthy"]], pred_DA="healthy")
  )

pl1 <- milo_res %>%
  left_join(expDA_df) %>%
  mutate(is_signif = ifelse(SpatialFDR < 0.1, 1, 0)) %>%
  mutate(logFC_color = ifelse(is_signif==1, logFC, NA)) %>%
  arrange(annotation_lineage) %>%
  mutate(Nhood=factor(Nhood, levels=unique(Nhood))) %>%
  filter(!is.na(annotation_lineage)) %>%
  ggplot(aes(annotation_indepth, logFC, color=logFC_color)) +
  scale_color_gradient2() +
  guides(color="none") +
  xlab(group.by) + ylab("Log Fold Change") +
  ggbeeswarm::geom_quasirandom(alpha=1) +
  coord_flip() +
  facet_grid(annotation_lineage~., scales="free", space="free") +
  theme_bw(base_size=22) +
  theme(strip.text.y =  element_text(angle=0),
        axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
        )

pl2 <- milo_res %>%
  left_join(expDA_df) %>%
  # dplyr::filter(!is.na(pred_DA)) %>%
  group_by(annotation_indepth) %>%
  summarise(pred_DA=dplyr::first(pred_DA), annotation_lineage=dplyr::first(annotation_lineage)) %>%
  mutate(end=ifelse(pred_DA=="healthy", 0, 1),
         start=ifelse(pred_DA=="healthy", 1, 0)) %>%
  filter(!is.na(annotation_lineage)) %>%
  ggplot(aes(annotation_indepth, start, xend = annotation_indepth, yend = end, color=pred_DA)) +
  geom_segment(size=1,arrow=arrow(length = unit(0.1, "npc"), type="closed")) +
  coord_flip() +
  xlab("annotation") +
  facet_grid(annotation_lineage~.,
    # annotation_lineage~"Ramachandran et al.\nDA predictions",
             scales="free", space="free") +
  # guides(color="none") +
  scale_color_brewer(palette="Set1", direction = -1,
                     labels=c("enriched in cirrhotic", "enriched in healthy"),
                     na.translate = F,
                     name="Ramachandran et al.\nDA predictions") +
  guides(color=guide_legend(ncol = 1)) +
  theme_bw(base_size=22) +
  ylim(-0.1,1.1) +
  theme(strip.text.y = element_blank(),strip.text.x = element_text(angle=90),
        plot.margin = unit(c(0,0,0,0), "cm"), panel.grid = element_blank(),
        axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        legend.position = "bottom")

fig4_bleft <- (pl2 + pl1 +
  plot_layout(widths=c(1,10), guides = "collect") & theme(legend.position = 'top', legend.justification = 0))

# ggsave("~/mount/gdrive/milo/Figures/liver_v2/liver_DAcomparison.pdf", width=8, height = 13)
ggsave("~/dropbox-vu/temp/milo_output/liver_v2/liver_DAcomparison.pdf", width=8, height = 13)
```

### Close-up on Endothelial lineage

```{r}
endo_milo <- scater::runUMAP(liver_milo[,liver_milo$annotation_lineage=="Endothelia"],  dimred='PCA')
plotUMAP(endo_milo, colour_by = "annotation_indepth")
```

```{r}
umap_df <- data.frame(reducedDim(endo_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

endo_umap <- cbind(umap_df, condition=endo_milo$condition) %>%
   ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.3, alpha=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color="none") +
  facet_wrap(condition~., ncol=1) +
  theme_nothing() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=22))
```

```{r, fig.width=8, fig.height=4, message=FALSE, warning=FALSE}
liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP") 

endo_gr <-
  plotNhoodGraphDA(
  liver_milo2, milo_res,
  subset.nhoods = which(milo_res$annotation_lineage == "Endothelia"), 
  size_range=c(1,4),
  # ) =)[1:(length()-1)], 
  alpha = 0.1
  )  +
   theme(legend.text = element_text(size=20), legend.title = element_text(size=22))
  
# liver_milo2 <- liver_milo
# subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
# reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP") 
# endo_gr_groups <- plotNhoodGroups(liver_milo2, milo_res_endogroups[milo_res_endogroups$annotation_lineage=="Endothelia",], 
#                 show_groups = c("54", "70"),
#                 size_range=c(1,4),
#                 subset.nhoods = milo_res_endogroups$annotation_lineage=="Endothelia") +
#   scale_fill_manual(values=c("54"=brewer.pal(4, "Spectral")[2], "70"=brewer.pal(4, "Spectral")[3]), 
#                     labels=c("54"="Uninjured group", '70'= "Cirrhotic group"),
#                     na.value="white",
#                     name = "Nhood group"
#                     ) +
#   theme(legend.text = element_text(size=20), legend.title = element_text(size=22))

fig4_bright1 <- ((endo_umap + endo_gr ) + 
  plot_layout(widths = c(1,2), 
                guides = "collect"
                )) 
fig4_bright1
```

<!-- ```{r} -->
<!-- nh_graph <- nhoodGraph(liver_milo)[subset.nhoods,subset.nhoods] -->
<!-- nh_graph <- graph_from_adjacency_matrix(nh_graph) -->

<!-- col_vals <- colData(liver_milo)[as.numeric(vertex_attr(nh_graph)$name), colour_by] -->
<!-- V(nh_graph)$colour_by <- ifelse(milo_res[subset.nhoods,"SpatialFDR"] > 0.1, 0, milo_res[subset.nhoods,"logFC"]) -->
<!-- ggraph(simplify(nh_graph)) + -->
<!--       geom_edge_link0(edge_colour = "grey66", edge_alpha=0.2)   + -->
<!--       geom_node_point(aes(fill = colour_by), shape=21, size=2) + -->
<!--   scale_fill_gradient2() -->
<!-- ``` -->


### Close-up on Cholangiocytes

```{r}
chol_milo <- scater::runUMAP(liver_milo[,liver_milo$annotation_lineage=="Cholangiocytes"],  dimred='PCA')
plotUMAP(chol_milo, colour_by = "annotation_indepth")

plotUMAP(chol_milo, colour_by = "percent.mito")
```

Filter out cells that show contamination from immune cells (expression of immune markers)

```{r}
keep <- logcounts(chol_milo)["CD19",] == 0 | logcounts(chol_milo)["MS4A1",] == 0
chol_milo <- chol_milo[,keep]
chol_milo <- scater::runUMAP(chol_milo,  dimred='PCA')

plotUMAP(chol_milo, colour_by = "annotation_indepth")
```

```{r, fig.width=10, fig.height=8}
umap_df <- data.frame(reducedDim(chol_milo, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")

chol_umap <- cbind(umap_df, condition=chol_milo$condition) %>%
   ggplot(aes(UMAP_1, UMAP_2, color=condition)) +
  geom_point(size=0.3, alpha=0.5) +
  scale_color_brewer(palette="Set1", name='') +
  xlab("UMAP1") + ylab("UMAP2") +
  coord_fixed() +
  guides(color="none") +
  facet_wrap(condition~., ncol=1) +
  theme_nothing() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), legend.position=c(0.9,0.9),
        strip.background = element_rect(color=NA), strip.text = element_text(size=22))

chol_umap
```

```{r, fig.width=8, fig.height=4, warning=FALSE, message=FALSE}
liver_milo2 <- liver_milo
subset.nhoods <- milo_res$annotation_lineage=="Cholangiocytes"
reducedDim(liver_milo2, "UMAP")[colnames(chol_milo),] <- reducedDim(chol_milo, "UMAP") 

chol_gr <-
  plotNhoodGraphDA(
  liver_milo2, milo_res,
  subset.nhoods = subset.nhoods,
  size_range=c(2,5),
  # ) =)[1:(length()-1)], 
  alpha = 0.1
  )  +
   theme(legend.text = element_text(size=22), legend.title = element_text(size=24))
  
(chol_umap + chol_gr ) + 
  plot_layout(widths = c(1,2), 
                guides = "collect"
                )
# fig4_bright1 +
#   ggsave("~/milo_output/liver_endoGraph.pdf", width=9, height = 5)  
ggsave("~/dropbox-vu/temp/milo_output/liver_endoGraph.pdf", width=9, height = 5)  
```

### Differential Gene Expression analysis

In a subset of lineages, we want to test for differential expression between neighbourhoods enriched in cirrhotic cells and neighbourhoods enriched

<!-- (Now coded in `miloR\R\testDiffExp.R`) -->

<!-- ```{r} -->
<!-- .perform_counts_dge <- function(exprs.data, test.model, gene.offset=gene.offset, -->
<!--                                 model.contrasts=NULL, n.coef=NULL){ -->

<!--     i.dge <- DGEList(counts=exprs.data, -->
<!--                      lib.size=log(colSums(exprs.data))) -->

<!--     if(isTRUE(gene.offset)){ -->
<!--         n.gene <- apply(exprs.data, 2, function(X) sum(X > 0)) -->
<!--         if(ncol(test.model) == 2){ -->
<!--             test.model <- cbind(test.model, n.gene) -->
<!--             colnames(test.model) <- c(colnames(test.model)[1:2], "NGenes") -->
<!--         } else if (ncol(test.model) > 2){ -->
<!--             test.model <- cbind(test.model[, 1], n.gene, test.model[, c(2:ncol(test.model))]) -->
<!--             colnames(test.model) <- c(colnames(test.model)[1], "NGenes", colnames(test.model[, c(2:ncol(test.model))])) -->
<!--         } else{ -->
<!--             if(ncol(test.model) < 2){ -->
<!--                 warning("Only one column in model matrix - must have at least 2. gene.offset forced to  FALSE") -->
<!--             } -->
<!--         } -->
<!--     } -->

<!--     i.dge <- estimateDisp(i.dge, test.model) -->
<!--     i.fit <- glmQLFit(i.dge, test.model, robust=TRUE) -->

<!--     if(!is.null(model.contrasts)){ -->
<!--         mod.constrast <- makeContrasts(contrasts=model.contrasts, levels=test.model) -->
<!--         i.res <- as.data.frame(topTags(glmQLFTest(i.fit, contrast=mod.constrast), -->
<!--                                        sort.by='none', n=Inf)) -->
<!--     } else{ -->
<!--         if(is.null(n.coef)){ -->
<!--             n.coef <- ncol(test.model) -->
<!--         } -->
<!--         i.res <- as.data.frame(topTags(glmQLFTest(i.fit, coef=n.coef), sort.by='none', n=Inf)) -->
<!--     } -->
<!--     return(i.res) -->
<!-- } -->

<!-- ``` -->

## NOTE: Code below here won't work unless `hvgs` is defined
### DRT: stopping run-through here
Should be able to run everything above.
----

Add nhood expression to speed-up plotting of heatmaps

```{r}
liver_milo <- calcNhoodExpression(liver_milo, assay = "logcounts", subset.row = hvgs)
```


## Endothelia

Rebuttal figure showcasing grouping

```{r, fig.height=5, fig.width=14}
set.seed(42)
milo_res_endogroups <- groupNhoods(liver_milo, milo_res, max.lfc.delta = 2, overlap = 1)

p1 <- plotNhoodGroups(liver_milo, milo_res_endogroups, 
                size_range=c(1,3)) 

milo_res_endogroups <- annotateNhoods(liver_milo, milo_res_endogroups, 'annotation_lineage')

p2 <- plotDAbeeswarm(milo_res_endogroups, group.by = 'NhoodGroup') +
  facet_grid(annotation_lineage~., scales="free", space="free")


## Plot expression in T cell neighbourhoods
markers_df <- read_csv("~/mount/gdrive/milo/STable3_Ramachandran.csv")
tcell_marker_genes <- 
  markers_df %>%
  filter(cluster %in% c("Tcell", "ILC")) %>%
  top_n(30, myAUC) %>%
  pull(gene)

p3 <- plotNhoodExpressionGroups(liver_milo, milo_res_endogroups, features = unique(tcell_marker_genes), 
                          subset.nhoods = milo_res_endogroups$NhoodGroup %in% c("3","10", "14"),
                          scale=TRUE, cluster_features = TRUE,show_rownames = TRUE
                          ) +
  theme(strip.text.x = element_text(angle=90))

```
```{r, fig.width=15, fig.height=10}
(((p1 + theme())/ (p3 + theme(strip.text = element_text(size=10, angle=45)))) + 
  plot_layout(heights = c(1.1,1), guides="collect"
              )| 
  (
    p2 + theme_bw(base_size=16) + theme(strip.text.y = element_text(angle=0))
    )) + 
  plot_layout(widths = c(1.4, 1)) +
  plot_annotation(tag_levels = c("A", "C", "B") ) +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/RFig_grouping.pdf", width=15, height = 12) +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/RFig_grouping.png", width=15, height = 12)
```

Group endothelial cells by logFC and DA results

```{r}
milo_res_endogroups$annotation_indepth[milo_res_endogroups$annotation_indepth_fraction < 0.6] <- NA
milo_res_endogroups$annotation_lineage[milo_res_endogroups$annotation_indepth_fraction < 0.6] <- NA

## Group neighbourhoods by DA outcome
milo_res_endogroups$NhoodGroup <- NA
milo_res_endogroups$NhoodGroup <- ifelse((milo_res_endogroups$annotation_lineage == "Endothelia") & (milo_res_endogroups$SpatialFDR < 0.1) & (milo_res_endogroups$logFC < -2.5), "54", milo_res_endogroups$NhoodGroup)
milo_res_endogroups$NhoodGroup <- ifelse((milo_res_endogroups$annotation_lineage == "Endothelia") & (milo_res_endogroups$SpatialFDR < 0.1) & (milo_res_endogroups$logFC > 2.5), "70", milo_res_endogroups$NhoodGroup)


liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Endo")
reducedDim(liver_milo2, "UMAP")[colnames(endo_milo),] <- reducedDim(endo_milo, "UMAP") 
endo_gr_groups <- plotNhoodGroups(liver_milo2, milo_res_endogroups[milo_res_endogroups$annotation_lineage=="Endothelia",], 
                show_groups = c("54", "70"),
                size_range=c(1,4),
                subset.nhoods = milo_res_endogroups$annotation_lineage=="Endothelia") +
  scale_fill_manual(values=c("54"=brewer.pal(4, "Spectral")[2], "70"=brewer.pal(4, "Spectral")[3]), 
                    labels=c("54"="Uninjured group", '70'= "Cirrhotic group"),
                    na.value="white",
                    name = "Nhood group"
                    ) +
  theme(legend.text = element_text(size=20), legend.title = element_text(size=22))
```

```{r, fig.width=18, fig.height=5}
fig4_bright1 <- ((endo_umap + endo_gr) + 
  plot_layout(widths = c(1,2), guides="collect"
                )) &
  theme(legend.box = "horizontal", legend.position = "top", legend.direction = "vertical")
fig4_bright1
```


Calculate marker genes between the two groups
```{r}
mito_genes <- str_detect(hvgs, "^MT-")
markers_df <- findNhoodGroupMarkers(liver_milo, da.res = milo_res_endogroups, assay="counts",
                      subset.nhoods = (milo_res_endogroups$NhoodGroup %in% c("54", "70")),
                      subset.groups = c("54", "70"),
                      subset.row = hvgs[!mito_genes],
                      aggregate.samples = TRUE, sample_col = "dataset"
                      )

milo_res_endogroups[milo_res_endogroups$NhoodGroup %in% c("54", "70"),]

colnames(markers_df) <- str_replace(colnames(markers_df), "70", "cirr")
colnames(markers_df) <- str_replace(colnames(markers_df), "54", "uninj")
```

#### Visualize as volcano 

```{r, fig.height=6, fig.width=10, message=FALSE, warning=FALSE}

highlight_genes <- c("PLVAP", "VWA1", "ACKR1", "IL32",
                     "CLEC4G", "CLEC4M", "FCN2", "FCN3",
                     "LEF1")

marker.df <- markers_df
marker.df %>%
  mutate(label=ifelse(GeneID %in% highlight_genes, GeneID, NA)) %>%
  ggplot(aes(logFC_cirr, -log10(adj.P.Val_cirr), 
             # color=highlight
             )) + 
  geom_point() +
  geom_text(aes(label=label), color="red") +
  xlab("logFC") + ylab("- log10(Adj. P value)") +
  theme_bw(base_size = 22)
  
```


#### Visualize as heatmap 
(gene expression values are scaled between 0 and 1 for each gene)

```{r, fig.height=10, fig.width=12, message=FALSE, warning=FALSE}
marker_genes <- marker.df %>%
  dplyr::filter(adj.P.Val_cirr < 0.05) %>%
  pull(GeneID)

fig4_bbright <-
  plotNhoodExpressionDA(liver_milo, milo_res_endogroups, c(marker_genes), cluster_features = TRUE, assay = "counts",
                      alpha = 0.1,
                      scale_to_1 = TRUE,
                      subset.nhoods =  milo_res_endogroups$NhoodGroup %in% c("54", "70"),
                      # grid.space = "free",
                      highlight_features = highlight_genes, show_rownames = FALSE
                      ) +
  ylab("DE genes")+
  # facet_grid(.~NhoodGroup, scales="free", space="free")
   theme(legend.text = element_text(size=22), legend.title = element_text(size=24)) +
  plot_layout(heights = c(1,10)) & theme(legend.margin = margin(0,0,0,60), legend.background = element_blank())

  
pl3 <- fig4_bbright$data %>%
  ggplot(aes(logFC_rank, 1,fill=logFC)) +
  geom_tile() +
      theme_classic(base_size=16) +
    ylab("") +
  scale_fill_gradient2(name="DA logFC") +
    # scale_fill_manual(values=c("54"=brewer.pal(4, "Spectral")[2], "70"=brewer.pal(4, "Spectral")[3]), 
    #                 labels=c("54"="Uninjured group", '70'= "Cirrhotic group"),
    #                 na.value="white",
    #                 name = "Nhood group"
    #                 ) +
    scale_x_continuous(expand = c(0.01, 0)) +
    theme(axis.text = element_blank(), axis.ticks = element_blank(), axis.line = element_blank(), 
          axis.title = element_blank())

fig4_bbright <- pl3 / fig4_bbright  +
  plot_layout(heights = c(1,20))

fig4_bbright
```

### GO term analysis

```{r, echo=FALSE, warning=FALSE, message=FALSE}
# BiocManager::install('clusterProfiler')
# BiocManager::install('msigdbr')
library(clusterProfiler)
library(msigdbr)

m_df <- msigdbr(species = "Homo sapiens")
m_t2g <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "BP")  %>% 
  dplyr::select(gs_name, gene_symbol)
```

```{r, echo=FALSE, warning=FALSE, message=FALSE}
marker_genes_up <- marker.df %>%
  dplyr::filter(adj.P.Val_cirr < 0.05 & logFC_cirr > 0.5) %>%
  pull(GeneID) 

marker_genes_down <- marker.df %>%
  dplyr::filter(adj.P.Val_cirr < 0.05 & logFC_uninj > 0.5) %>%
  pull(GeneID)

em_up <- enricher(marker_genes_up, TERM2GENE=m_t2g, pAdjustMethod = "fdr", 
                  universe = hvgs
                  )
em_down <- enricher(marker_genes_down, TERM2GENE=m_t2g, pAdjustMethod = "fdr", 
                    universe = rownames(liver_milo)
                    )

em_res_up <- em_up@result[em_up@result$qvalue < 0.1,] %>%
  dplyr::select(- c(Description))
em_res_down <- em_down@result[em_down@result$qvalue < 0.1,] %>%
  dplyr::select(- c(Description))
```

```{r, fig.height=8, fig.width=15, warning=FALSE, message=FALSE}
go_endo_up <- em_res_up %>%
  top_n(30, -log10(qvalue)) %>%
   mutate(ID=ifelse(ID=='GO_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II', "GO_ANTIGEN_PRESENTATION_VIA_MHC_CLASS_II", ID)) %>%
  mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
  ggplot(aes(Term, -log10(qvalue))) +
  geom_point() +
  coord_flip() +
  xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
  theme_bw(base_size=18) +
  ggtitle("Cirrhotic endothelia")

go_endo_down <- em_res_down %>%
  top_n(30, -log10(qvalue)) %>%
  mutate(ID=ifelse(ID=='GO_ANTIGEN_PROCESSING_AND_PRESENTATION_OF_PEPTIDE_OR_POLYSACCHARIDE_ANTIGEN_VIA_MHC_CLASS_II', "GO_ANTIGEN_PRESENTATION_VIA_MHC_CLASS_II", ID)) %>%
  mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
  ggplot(aes(Term, -log10(qvalue))) +
  geom_point() +
  coord_flip() +
  xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
  theme_bw(base_size=18) +
  ggtitle("Uninjured endothelia")

go_endo_up
go_endo_down
```


```{r}
em_res_up
em_res_down
```

## Cholangiocytes

```{r}
set.seed(42)
milo_res_cholgroups <- groupNhoods(liver_milo, milo_res, max.lfc.delta = 0.5, overlap = 1)

## Group neighbourhoods by DA outcome
milo_res_cholgroups$NhoodGroup <- NA
milo_res_cholgroups$NhoodGroup <- ifelse((milo_res_cholgroups$annotation_lineage == "Cholangiocytes") & (milo_res_cholgroups$SpatialFDR < 0.1) & (milo_res_cholgroups$logFC < -2.5), "38", milo_res_cholgroups$NhoodGroup)
milo_res_cholgroups$NhoodGroup <- ifelse((milo_res_cholgroups$annotation_lineage == "Cholangiocytes") & (milo_res_cholgroups$SpatialFDR < 0.1) & (milo_res_cholgroups$logFC > 2.5), "49", milo_res_cholgroups$NhoodGroup)

liver_milo2 <- liver_milo
subset.nhoods <- str_detect(milo_res$annotation_indepth, "Chol")
reducedDim(liver_milo2, "UMAP")[colnames(chol_milo),] <- reducedDim(chol_milo, "UMAP") 
plotNhoodGroups(liver_milo2, milo_res_cholgroups[milo_res_cholgroups$annotation_lineage=="Cholangiocytes",], 
                show_groups = c("49","38"),
                subset.nhoods =  milo_res_cholgroups$annotation_lineage =="Cholangiocytes")

```

Calculate marker genes between the two groups
```{r}
## Filter genes expressed in cholangiocytes
# chol_hvgs <- hvgs[(counts(chol_milo)[hvgs,] > 0) %>% {rowSums(.)/ncol(chol_milo)} > 0.01]
mito_genes <- str_detect(hvgs, "^MT-")

markers_df <- findNhoodGroupMarkers(liver_milo, da.res = milo_res_cholgroups, assay="counts",
                      subset.nhoods = milo_res_cholgroups$NhoodGroup %in%c("49","38"),
                      subset.groups = c("49","38"),
                      subset.row = hvgs[!mito_genes],
                      aggregate.samples = TRUE, sample_col = "dataset"
                      )

markers_df 

milo_res_cholgroups[milo_res_cholgroups$NhoodGroup %in%c("49","38"),]
```

#### Visualize as volcano 

```{r, fig.height=6, fig.width=10, message=FALSE, warning=FALSE}
marker.df.chol <- markers_df

volcano_chol <-
  marker.df.chol %>%
  mutate(up=ifelse(logFC_49 > 0, "up", "down")) %>%
  group_by(up) %>%
  mutate(label=ifelse(rank(adj.P.Val_49) < 15, GeneID, NA)) %>%
  # mutate(label=ifelse((adj.P.Val_49 < 0.05 & logFC_49 < -3) | (adj.P.Val_49 < 0.05 & logFC_49 > 0), GeneID, NA)) %>%
  ggplot(aes(logFC_49, -log10(adj.P.Val_49), 
             # color=highlight
             )) + 
  geom_point(size=0.8, alpha=0.6) +
  ggrepel::geom_text_repel(aes(label=label), segment.alpha = 0.2) +
  xlab("logFC") + ylab("- log10(Adj. P value)") +
  theme_bw(base_size = 22)

volcano_chol  
  
```



### GO term analysis

```{r, echo=FALSE, warning=FALSE, message=FALSE}
marker_genes_chol <- marker.df.chol %>%
  dplyr::filter(adj.P.Val_49 < 0.05 & logFC_49 > 0) %>%
  pull(GeneID)

em_up_chol <- enricher(marker_genes_chol, TERM2GENE=m_t2g, pAdjustMethod = "fdr", 
                  universe = rownames(liver_milo)
                  )

em_res_up_chol <- em_up_chol@result[em_up_chol@result$qvalue < 0.1,] %>%
  dplyr::select(- c(Description))
```

```{r, fig.height=8, fig.width=15, warning=FALSE, message=FALSE}
go_chol_up <- em_res_up_chol %>%
  top_n(20, -log10(qvalue)) %>%
  mutate(Term=factor(ID, levels=rev(unique(ID)))) %>%
  ggplot(aes(Term, -log10(qvalue))) +
  geom_point() +
  coord_flip() +
  xlab("GO Biological Function") + ylab("-log10(Adj. p-value)") +
  theme_bw(base_size=18) +
  ggtitle("Cirrhotic cholangiocytes")

go_chol_up
```

```{r}
em_res_up_chol
```
```{r, echo=FALSE, warning=FALSE, message=FALSE}
marker_genes_chol_down <- marker.df.chol %>%
  dplyr::filter(adj.P.Val_49 < 0.05 & logFC_49 < 0) %>%
  pull(GeneID)

em_down_chol <- enricher(marker_genes_chol_down, TERM2GENE=m_t2g, pAdjustMethod = "fdr", 
                  universe = rownames(liver_milo)
                  )

em_res_down_chol <- em_down_chol@result[em_down_chol@result$qvalue < 0.1,] %>%
  dplyr::select(- c(Description))
```


---

Assemble figure
```{r, fig.height=25, fig.width=19}
fig4_bottom <- ((fig4_bleft + plot_layout()) |
      ((fig4_bright1 + plot_layout(tag_level = 'keep')) / (fig4_bbright + plot_layout())) +
      plot_layout(heights = c(1,1.6))
   ) +
  plot_layout(widths=c(1,1.4))

(fig4_top / fig4_bottom) +
  plot_layout(heights=c(1,1.8))  +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/fig4_raw.pdf", height = 26, width = 24, useDingbats=FALSE) 
  # ggsave("~/mount/gdrive/milo/Figures/liver_v2/fig4_raw.png", height = 26, width = 24, useDingbats=FALSE)
  # ggsave("~/milo/ms/figures/figs/figure5.pdf", height = 26, width = 22, useDingbats=FALSE)
```

Assemble supplementary figure

```{r, fig.width=25, fig.height=7}
p1 <- plot_grid( go_endo_up+ theme(plot.title = element_text(hjust = 1),
                                   axis.title.x = element_text(hjust = 1)), 
                 go_endo_down+ theme(plot.title = element_text(hjust = 1),
                                     axis.title.x = element_text(hjust = 1)), 
                 label_size = 18,
                 ncol=1,
                 rel_heights = c(2,2),
                labels = c("A", "B","C"))

p1

chol_emb <- (chol_umap + chol_gr ) + 
  plot_layout(widths = c(1,2), 
                guides = "collect"
                )

```

```{r, fig.height=10, fig.width=8}
plot_grid(
  go_endo_up+ theme(plot.title = element_text(hjust = 1),
                                   axis.title.x = element_text(hjust = 1)), 
                 go_endo_down+ theme(plot.title = element_text(hjust = 1),
                                     axis.title.x = element_text(hjust = 1)), 
                 label_size = 18,
                 ncol=1,
                 rel_heights = c(2,2), rel_widths = c(2,2),
                labels = c("A", "B")
  ) +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig_endo.pdf", height = 12, width=12) +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig_endo.png", height = 12, width=12)


```
```{r, fig.height=10, fig.width=8}
plot_grid(plot_grid(chol_umap, chol_gr, volcano_chol, nrow=1,rel_widths = c(1,2,2),
                          label_size = 18,
                labels = c("A","B","C")),
                go_chol_up + theme(plot.title = element_text(hjust = 1),
                                   axis.title.x = element_text(hjust = 1)), 
                ncol=1,
                rel_heights = c(1,1),
                 label_size = 18,
                labels=c("",'D')) +
   ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig7.pdf", height = 13, width=14) +
  ggsave("~/mount/gdrive/milo/Figures/liver_v2/suppl_fig7.png", height = 13, width=14) 
```
